# -*- coding: utf-8 -*-
import scrapy
from scrapy.loader import ItemLoader, Identity
from spiders.items import CoserItem


class BcySpider(scrapy.Spider):
    # 爬虫名称
    name = "bcy"
    # 设置允许的域名
    allowed_domains = ["bcy.net"]
    # 设置开始爬去的页面
    start_urls = (
        'http://bcy.net/coser/detail/9495/130440',
    )
    # 自定义配置 pipeline
    custom_settings = {
        'ITEM_PIPELINES': {
            # 'spiders.pipelines.TestPipeline.TestPipeline': 1,
            'spiders.pipelines.OkexPipeline.OkexPipeline': 1,
        }
    }

    def parse(self, response):
        item = ItemLoader(item=CoserItem(), response=response)
        item.add_xpath('name', "//h1[@class='js-post-title']/text()")
        item.add_xpath('info', "//div[@class='post__info']/div[@class='post__type post__info-group']/span/text()")
        urls = item.get_xpath('//img[@class="detail_std detail_clickable"]/@src')
        urls = [url.replace('/w650', '') for url in urls]
        item.add_value('image_urls', urls)
        item.add_value('url', response.url)
        return item.load_item()
